SUMMARY = "Updates the NVD CVE database"
LICENSE = "MIT"

# Important note:
# This product uses the NVD API but is not endorsed or certified by the NVD.

INHIBIT_DEFAULT_DEPS = "1"

inherit native

deltask do_patch
deltask do_configure
deltask do_compile
deltask do_install
deltask do_populate_sysroot

NVDCVE_URL ?= "https://services.nvd.nist.gov/rest/json/cves/2.0"

# If you have a NVD API key (https://nvd.nist.gov/developers/request-an-api-key)
# then setting this to get higher rate limits.
NVDCVE_API_KEY ?= ""

# CVE database update interval, in seconds. By default: once a day (23*60*60).
# Use 0 to force the update
# Use a negative value to skip the update
CVE_DB_UPDATE_INTERVAL ?= "82800"

# CVE database incremental update age threshold, in seconds. If the database is
# older than this threshold, do a full re-download, else, do an incremental
# update. By default: the maximum allowed value from NVD: 120 days (120*24*60*60)
# Use 0 to force a full download.
CVE_DB_INCR_UPDATE_AGE_THRES ?= "10368000"

# Number of attempts for each http query to nvd server before giving up
CVE_DB_UPDATE_ATTEMPTS ?= "5"

CVE_CHECK_DB_DLDIR_FILE ?= "${DL_DIR}/CVE_CHECK2/${CVE_CHECK_DB_FILENAME}"
CVE_CHECK_DB_DLDIR_LOCK ?= "${CVE_CHECK_DB_DLDIR_FILE}.lock"
CVE_CHECK_DB_TEMP_FILE ?= "${CVE_CHECK_DB_FILE}.tmp"

python () {
    if not bb.data.inherits_class("cve-check", d):
        raise bb.parse.SkipRecipe("Skip recipe when cve-check class is not loaded.")
}

python do_fetch() {
    """
    Update NVD database with API 2.0
    """
    import bb.utils
    import bb.progress
    import shutil

    bb.utils.export_proxies(d)

    db_file = d.getVar("CVE_CHECK_DB_DLDIR_FILE")
    db_dir = os.path.dirname(db_file)
    db_tmp_file = d.getVar("CVE_CHECK_DB_TEMP_FILE")

    cleanup_db_download(db_tmp_file)
    # By default let's update the whole database (since time 0)
    database_time = 0

    # The NVD database changes once a day, so no need to update more frequently
    # Allow the user to force-update
    try:
        import time
        update_interval = int(d.getVar("CVE_DB_UPDATE_INTERVAL"))
        if update_interval < 0:
            bb.note("CVE database update skipped")
            if not os.path.exists(db_file):
                bb.error("CVE database %s not present, database fetch/update skipped" % db_file)
            return
        curr_time = time.time()
        database_time = os.path.getmtime(db_file)
        bb.note("Current time: %s; DB time: %s" % (time.ctime(curr_time), time.ctime(database_time)))
        if curr_time < database_time:
            bb.warn("Database time is in the future, force DB update")
            database_time = 0
        elif curr_time - database_time < update_interval:
            bb.note("CVE database recently updated, skipping")
            return

    except OSError:
        pass

    if bb.utils.to_boolean(d.getVar("BB_NO_NETWORK")):
        bb.error("BB_NO_NETWORK attempted to disable fetch, this recipe uses CVE_DB_UPDATE_INTERVAL to control download, set to '-1' to disable fetch or update")

    bb.utils.mkdirhier(db_dir)
    bb.utils.mkdirhier(os.path.dirname(db_tmp_file))
    if os.path.exists(db_file):
        shutil.copy2(db_file, db_tmp_file)

    if update_db_file(db_tmp_file, d, database_time):
        # Update downloaded correctly, can swap files
        shutil.move(db_tmp_file, db_file)
    else:
        # Update failed, do not modify the database
        bb.warn("CVE database update failed")
        os.remove(db_tmp_file)
}

do_fetch[lockfiles] += "${CVE_CHECK_DB_DLDIR_LOCK}"
do_fetch[file-checksums] = ""
do_fetch[vardeps] = ""

python do_unpack() {
    import shutil
    shutil.copyfile(d.getVar("CVE_CHECK_DB_DLDIR_FILE"), d.getVar("CVE_CHECK_DB_FILE"))
}
do_unpack[lockfiles] += "${CVE_CHECK_DB_DLDIR_LOCK} ${CVE_CHECK_DB_FILE_LOCK}"

def cleanup_db_download(db_tmp_file):
    """
    Cleanup the download space from possible failed downloads
    """

    # Clean-up the temporary file downloads, we can remove both journal
    # and the temporary database
    if os.path.exists("{0}-journal".format(db_tmp_file)):
        os.remove("{0}-journal".format(db_tmp_file))
    if os.path.exists(db_tmp_file):
        os.remove(db_tmp_file)

def nvd_request_wait(attempt, min_wait):
    return min(((2 * attempt) + min_wait), 30)

def nvd_request_next(url, attempts, api_key, args, min_wait):
    """
    Request next part of the NVD database
    NVD API documentation: https://nvd.nist.gov/developers/vulnerabilities
    """

    import urllib.request
    import urllib.parse
    import gzip
    import http
    import time

    request = urllib.request.Request(url + "?" + urllib.parse.urlencode(args))
    if api_key:
        request.add_header("apiKey", api_key)
    bb.note("Requesting %s" % request.full_url)

    for attempt in range(attempts):
        try:
            r = urllib.request.urlopen(request)

            if (r.headers['content-encoding'] == 'gzip'):
                buf = r.read()
                raw_data = gzip.decompress(buf)
            else:
                raw_data = r.read().decode("utf-8")

            r.close()

        except Exception as e:
            wait_time = nvd_request_wait(attempt, min_wait)
            bb.note("CVE database: received error (%s)" % (e))
            bb.note("CVE database: retrying download after %d seconds. attempted (%d/%d)" % (wait_time, attempt+1, attempts))
            time.sleep(wait_time)
            pass
        else:
            return raw_data
    else:
        # We failed at all attempts
        return None

def update_db_file(db_tmp_file, d, database_time):
    """
    Update the given database file
    """
    import bb.progress
    import bb.utils
    import datetime
    import sqlite3
    import json

    # Connect to database
    conn = sqlite3.connect(db_tmp_file)
    initialize_db(conn)

    req_args = {'startIndex': 0}

    incr_update_threshold = int(d.getVar("CVE_DB_INCR_UPDATE_AGE_THRES"))
    if database_time != 0:
        database_date = datetime.datetime.fromtimestamp(database_time, tz=datetime.timezone.utc)
        today_date = datetime.datetime.now(tz=datetime.timezone.utc)
        delta = today_date - database_date
        if incr_update_threshold == 0:
            bb.note("CVE database: forced full update")
        elif delta < datetime.timedelta(seconds=incr_update_threshold):
            bb.note("CVE database: performing partial update")
            # The maximum range for time is 120 days
            if delta > datetime.timedelta(days=120):
                bb.error("CVE database: Trying to do an incremental update on a larger than supported range")
            req_args['lastModStartDate'] = database_date.isoformat()
            req_args['lastModEndDate'] = today_date.isoformat()
        else:
            bb.note("CVE database: file too old, forcing a full update")
    else:
        bb.note("CVE database: no preexisting database, do a full download")

    with bb.progress.ProgressHandler(d) as ph, open(os.path.join(d.getVar("TMPDIR"), 'cve_check'), 'a') as cve_f:

        bb.note("Updating entries")
        index = 0
        url = d.getVar("NVDCVE_URL")
        api_key = d.getVar("NVDCVE_API_KEY") or None
        attempts = int(d.getVar("CVE_DB_UPDATE_ATTEMPTS"))

        # Recommended by NVD
        wait_time = 6
        if api_key:
            wait_time = 2

        while True:
            req_args['startIndex'] = index
            raw_data = nvd_request_next(url, attempts, api_key, req_args, wait_time)
            if raw_data is None:
                # We haven't managed to download data
                return False

            # hack for json5 style responses
            if raw_data[-3:] == ',]}':
                bb.note("Removing trailing ',' from nvd response")
                raw_data = raw_data[:-3] + ']}'

            data = json.loads(raw_data)

            index = data["startIndex"]
            total = data["totalResults"]
            per_page = data["resultsPerPage"]
            bb.note("Got %d entries" % per_page)
            for cve in data["vulnerabilities"]:
                update_db(conn, cve)

            index += per_page
            ph.update((float(index) / (total+1)) * 100)
            if index >= total:
                break

            # Recommended by NVD
            time.sleep(wait_time)

        # Update success, set the date to cve_check file.
        cve_f.write('CVE database update : %s\n\n' % datetime.date.today())

    conn.commit()
    conn.close()
    return True

def initialize_db(conn):
    with conn:
        c = conn.cursor()

        c.execute("CREATE TABLE IF NOT EXISTS META (YEAR INTEGER UNIQUE, DATE TEXT)")

        c.execute("CREATE TABLE IF NOT EXISTS NVD (ID TEXT UNIQUE, SUMMARY TEXT, \
            SCOREV2 TEXT, SCOREV3 TEXT, SCOREV4 TEXT, MODIFIED INTEGER, VECTOR TEXT, VECTORSTRING TEXT)")

        c.execute("CREATE TABLE IF NOT EXISTS PRODUCTS (ID TEXT, \
            VENDOR TEXT, PRODUCT TEXT, VERSION_START TEXT, OPERATOR_START TEXT, \
            VERSION_END TEXT, OPERATOR_END TEXT)")
        c.execute("CREATE INDEX IF NOT EXISTS PRODUCT_ID_IDX on PRODUCTS(ID);")

        c.close()

def parse_node_and_insert(conn, node, cveId):

    def cpe_generator():
        for cpe in node.get('cpeMatch', ()):
            if not cpe['vulnerable']:
                return
            cpe23 = cpe.get('criteria')
            if not cpe23:
                return
            cpe23 = cpe23.split(':')
            if len(cpe23) < 6:
                return
            vendor = cpe23[3]
            product = cpe23[4]
            version = cpe23[5]

            if cpe23[6] == '*' or cpe23[6] == '-':
                version_suffix = ""
            else:
                version_suffix = "_" + cpe23[6]

            if version != '*' and version != '-':
                # Version is defined, this is a '=' match
                yield [cveId, vendor, product, version + version_suffix, '=', '', '']
            elif version == '-':
                # no version information is available
                yield [cveId, vendor, product, version, '', '', '']
            else:
                # Parse start version, end version and operators
                op_start = ''
                op_end = ''
                v_start = ''
                v_end = ''

                if 'versionStartIncluding' in cpe:
                    op_start = '>='
                    v_start = cpe['versionStartIncluding']

                if 'versionStartExcluding' in cpe:
                    op_start = '>'
                    v_start = cpe['versionStartExcluding']

                if 'versionEndIncluding' in cpe:
                    op_end = '<='
                    v_end = cpe['versionEndIncluding']

                if 'versionEndExcluding' in cpe:
                    op_end = '<'
                    v_end = cpe['versionEndExcluding']

                if op_start or op_end or v_start or v_end:
                    yield [cveId, vendor, product, v_start, op_start, v_end, op_end]
                else:
                    # This is no version information, expressed differently.
                    # Save processing by representing as -.
                    yield [cveId, vendor, product, '-', '', '', '']

    conn.executemany("insert into PRODUCTS values (?, ?, ?, ?, ?, ?, ?)", cpe_generator()).close()

def update_db(conn, elt):
    """
    Update a single entry in the on-disk database
    """

    accessVector = None
    vectorString = None
    cveId = elt['cve']['id']
    if elt['cve'].get('vulnStatus') == "Rejected":
        c = conn.cursor()
        c.execute("delete from PRODUCTS where ID = ?;", [cveId])
        c.execute("delete from NVD where ID = ?;", [cveId])
        c.close()
        return
    cveDesc = ""
    for desc in elt['cve']['descriptions']:
        if desc['lang'] == 'en':
            cveDesc = desc['value']
    date = elt['cve']['lastModified']
    try:
        accessVector = elt['cve']['metrics']['cvssMetricV2'][0]['cvssData']['accessVector']
        vectorString = elt['cve']['metrics']['cvssMetricV2'][0]['cvssData']['vectorString']
        cvssv2 = elt['cve']['metrics']['cvssMetricV2'][0]['cvssData']['baseScore']
    except KeyError:
        cvssv2 = 0.0
    cvssv3 = None
    try:
        accessVector = accessVector or elt['cve']['metrics']['cvssMetricV30'][0]['cvssData']['attackVector']
        vectorString = vectorString or elt['cve']['metrics']['cvssMetricV30'][0]['cvssData']['vectorString']
        cvssv3 = elt['cve']['metrics']['cvssMetricV30'][0]['cvssData']['baseScore']
    except KeyError:
        pass
    try:
        accessVector = accessVector or elt['cve']['metrics']['cvssMetricV31'][0]['cvssData']['attackVector']
        vectorString = vectorString or elt['cve']['metrics']['cvssMetricV31'][0]['cvssData']['vectorString']
        cvssv3 = cvssv3 or elt['cve']['metrics']['cvssMetricV31'][0]['cvssData']['baseScore']
    except KeyError:
        pass
    cvssv3 = cvssv3 or 0.0
    try:
        accessVector = accessVector or elt['cve']['metrics']['cvssMetricV40'][0]['cvssData']['attackVector']
        vectorString = vectorString or elt['cve']['metrics']['cvssMetricV40'][0]['cvssData']['vectorString']
        cvssv4 = elt['cve']['metrics']['cvssMetricV40'][0]['cvssData']['baseScore']
    except KeyError:
        cvssv4 = 0.0
    accessVector = accessVector or "UNKNOWN"
    vectorString = vectorString or "UNKNOWN"

    conn.execute("insert or replace into NVD values (?, ?, ?, ?, ?, ?, ?, ?)",
                [cveId, cveDesc, cvssv2, cvssv3, cvssv4, date, accessVector, vectorString]).close()

    try:
        # Remove any pre-existing CVE configuration. Even for partial database
        # update, those will be repopulated. This ensures that old
        # configuration is not kept for an updated CVE.
        conn.execute("delete from PRODUCTS where ID = ?", [cveId]).close()
        for config in elt['cve']['configurations']:
            # This is suboptimal as it doesn't handle AND/OR and negate, but is better than nothing
            for node in config["nodes"]:
                parse_node_and_insert(conn, node, cveId)
    except KeyError:
        bb.note("CVE %s has no configurations" % cveId)

do_fetch[nostamp] = "1"

EXCLUDE_FROM_WORLD = "1"
